Tracking the coronavirus in my neighborhood

An analysis of neighborhoods in Orange County using data science methodologies

In [3]:
pip install matplotlib
Requirement already satisfied: matplotlib in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (3.3.0)
Note: you may need to restart the kernel to use updated packages.
Requirement already satisfied: pillow>=6.2.0 in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from matplotlib) (7.2.0)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from matplotlib) (2.4.7)
Requirement already satisfied: numpy>=1.15 in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from matplotlib) (1.19.1)
Requirement already satisfied: python-dateutil>=2.1 in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from matplotlib) (2.8.1)
Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from matplotlib) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from matplotlib) (0.10.0)
Requirement already satisfied: six>=1.5 in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from python-dateutil>=2.1->matplotlib) (1.15.0)
In [2]:
pip install altair
Requirement already satisfied: altair in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (4.1.0)
Requirement already satisfied: numpy in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from altair) (1.19.1)
Requirement already satisfied: pandas>=0.18 in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from altair) (1.1.0)
Requirement already satisfied: toolz in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from altair) (0.10.0)
Requirement already satisfied: jinja2 in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from altair) (2.11.2)
Requirement already satisfied: jsonschema in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from altair) (3.2.0)
Requirement already satisfied: entrypoints in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from altair) (0.3)
Requirement already satisfied: python-dateutil>=2.7.3 in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from pandas>=0.18->altair) (2.8.1)
Requirement already satisfied: pytz>=2017.2 in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from pandas>=0.18->altair) (2020.1)
Requirement already satisfied: MarkupSafe>=0.23 in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from jinja2->altair) (1.1.1)
Requirement already satisfied: six>=1.11.0 in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from jsonschema->altair) (1.15.0)
Requirement already satisfied: setuptools in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from jsonschema->altair) (47.1.0)
Requirement already satisfied: attrs>=17.4.0 in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from jsonschema->altair) (19.3.0)
Requirement already satisfied: pyrsistent>=0.14.0 in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from jsonschema->altair) (0.16.0)
Note: you may need to restart the kernel to use updated packages.
In [8]:
pip install folium
Collecting folium
  Downloading folium-0.11.0-py2.py3-none-any.whl (93 kB)
Collecting requests
  Using cached requests-2.24.0-py2.py3-none-any.whl (61 kB)
Requirement already satisfied: jinja2>=2.9 in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from folium) (2.11.2)
Requirement already satisfied: numpy in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from folium) (1.19.1)
Collecting branca>=0.3.0
  Downloading branca-0.4.1-py3-none-any.whl (24 kB)
Collecting certifi>=2017.4.17
  Using cached certifi-2020.6.20-py2.py3-none-any.whl (156 kB)
Collecting chardet<4,>=3.0.2
  Using cached chardet-3.0.4-py2.py3-none-any.whl (133 kB)
Collecting idna<3,>=2.5
  Using cached idna-2.10-py2.py3-none-any.whl (58 kB)
Collecting urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1
  Using cached urllib3-1.25.10-py2.py3-none-any.whl (127 kB)
Requirement already satisfied: MarkupSafe>=0.23 in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from jinja2>=2.9->folium) (1.1.1)
Installing collected packages: certifi, chardet, idna, urllib3, requests, branca, folium
Successfully installed branca-0.4.1 certifi-2020.6.20 chardet-3.0.4 folium-0.11.0 idna-2.10 requests-2.24.0 urllib3-1.25.10
Note: you may need to restart the kernel to use updated packages.
In [5]:
import datetime as dt
import matplotlib.pyplot as plt
from matplotlib import style
import pandas as pd
import altair as alt
import folium as folium
In [6]:
ca_covid = pd.read_csv("https://raw.githubusercontent.com/datadesk/california-coronavirus-data/master/latimes-place-totals.csv")
ca_covid.head()
Out[6]:
date county fips place confirmed_cases note x y
0 2020-08-11 Alameda 1.0 Address unknown 86 NaN NaN NaN
1 2020-08-11 Alameda 1.0 Alameda 209 NaN -122.274444 37.756111
2 2020-08-11 Alameda 1.0 Albany 38 NaN -122.297778 37.886944
3 2020-08-11 Alameda 1.0 Berkeley 432 NaN -122.272778 37.871667
4 2020-08-11 Alameda 1.0 Castro Valley 364 NaN NaN NaN
In [7]:
# Unique County Names
county_name_list = ca_covid['county'].unique()
county_name_list = county_name_list.tolist()
county_name_list.insert(0, 'Select One')
print('Unique County Names: %d' % len(county_name_list))
Unique County Names: 44
In [8]:
county_name = 'Orange'
ca_county = ca_covid[ca_covid['county']== county_name]
ca_county = ca_county.sort_values('date', ascending = True)
ca_county.head()
Out[8]:
date county fips place confirmed_cases note x y
100198 2020-03-27 Orange 59.0 Yorba Linda 10 NaN -117.834681 33.890894
100170 2020-03-27 Orange 59.0 Aliso Viejo 2 NaN -117.726298 33.568460
100171 2020-03-27 Orange 59.0 Anaheim 28 NaN -117.889722 33.836111
100172 2020-03-27 Orange 59.0 Brea 1 NaN -117.900292 33.916475
100173 2020-03-27 Orange 59.0 Buena Park 7 NaN -117.998139 33.867404
In [9]:
city_name = 'Irvine'
ca_city = ca_county[ca_county['place']== city_name]
ca_city = ca_city.sort_values('date', ascending = True)
ca_city.head()
Out[9]:
date county fips place confirmed_cases note x y
100181 2020-03-27 Orange 59.0 Irvine 33 NaN -117.8436 33.686502
99877 2020-03-28 Orange 59.0 Irvine 36 NaN -117.8436 33.686502
99565 2020-03-29 Orange 59.0 Irvine 38 NaN -117.8436 33.686502
99215 2020-03-30 Orange 59.0 Irvine 43 NaN -117.8436 33.686502
98834 2020-03-31 Orange 59.0 Irvine 50 NaN -117.8436 33.686502
In [10]:
style.use('ggplot')
ax = plt.gca()
df_county.plot(kind='line',x='date',y='confirmed_cases', color='green', ax=ax, figsize=(20,10))
plt.title('Number of Cases In ' + county_name +' County')
plt.xlabel('date')
plt.ylabel('confirmed_cases')
plt.show()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-10-0f9d6873e045> in <module>
      1 style.use('ggplot')
      2 ax = plt.gca()
----> 3 df_county.plot(kind='line',x='date',y='confirmed_cases', color='green', ax=ax, figsize=(20,10))
      4 plt.title('Number of Cases In ' + county_name +' County')
      5 plt.xlabel('date')

NameError: name 'df_county' is not defined
In [6]:
import ipywidgets as widgets
from IPython.display import clear_output

default_county = 'Orange'
country_widget = widgets.Dropdown(
    options= county_name_list,
    value= default_county,
    description='county:',
)

def refresh_chart(county_name):
    # reset plot and widget
    clear_output(wait=True)
    display(country_widget)
    if county_name != 'Select One':
        ca_counties = pd.read_csv("https://raw.githubusercontent.com/datadesk/california-coronavirus-data/master/latimes-place-totals.csv")
        ca_county = ca_counties[ca_counties['county'] == county_name]
        ca_county = ca_county.sort_values('date', ascending = True)
        style.use('ggplot')
        ax = plt.gca()
        ca_county.plot(kind='line',x='date',y='confirmed_cases', color='green', ax=ax, figsize=(20,10))
        plt.title('Number of Cases In ' + county_name +' County')
        plt.xlabel('date')
        plt.ylabel('confirmed_cases')
        plt.show()

def on_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        refresh_chart(change['new'])
        
country_widget.observe(on_change)
display(country_widget)
refresh_chart(default_county)
c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages\pandas\plotting\_matplotlib\core.py:1235: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_xticklabels(xticklabels)
In [11]:
ca_covid.head()
Out[11]:
date county fips place confirmed_cases note x y
0 2020-08-11 Alameda 1.0 Address unknown 86 NaN NaN NaN
1 2020-08-11 Alameda 1.0 Alameda 209 NaN -122.274444 37.756111
2 2020-08-11 Alameda 1.0 Albany 38 NaN -122.297778 37.886944
3 2020-08-11 Alameda 1.0 Berkeley 432 NaN -122.272778 37.871667
4 2020-08-11 Alameda 1.0 Castro Valley 364 NaN NaN NaN
In [12]:
ca_latest_county = ca_covid[ca_covid['date'] == ca_covid['date'].max()]
del ca_latest_county['date']
del ca_latest_county['confirmed_cases']
ca_latest_county.head()
Out[12]:
county fips place note x y
0 Alameda 1.0 Address unknown NaN NaN NaN
1 Alameda 1.0 Alameda NaN -122.274444 37.756111
2 Alameda 1.0 Albany NaN -122.297778 37.886944
3 Alameda 1.0 Berkeley NaN -122.272778 37.871667
4 Alameda 1.0 Castro Valley NaN NaN NaN
In [13]:
ca_county.query("confirmed_cases == 'NaN'")
Out[13]:
date county fips place confirmed_cases note x y
In [14]:
ca_county.query("x == 'NaN'")
Out[14]:
date county fips place confirmed_cases note x y
100196 2020-03-27 Orange 59.0 Unknown 34 NaN NaN NaN
100188 2020-03-27 Orange 59.0 Other 27 NaN NaN NaN
99893 2020-03-28 Orange 59.0 Unknown 46 NaN NaN NaN
99884 2020-03-28 Orange 59.0 Other 39 NaN NaN NaN
99581 2020-03-29 Orange 59.0 Unknown 50 NaN NaN NaN
... ... ... ... ... ... ... ... ...
2270 2020-08-09 Orange 59.0 Rossmoor 29 NaN NaN NaN
1491 2020-08-10 Orange 59.0 Silverado 42 NaN NaN NaN
1486 2020-08-10 Orange 59.0 Rossmoor 29 NaN NaN NaN
659 2020-08-11 Orange 59.0 Silverado 42 NaN NaN NaN
654 2020-08-11 Orange 59.0 Rossmoor 31 NaN NaN NaN

191 rows × 8 columns

In [15]:
ca_county.query("y == 'NaN'")
Out[15]:
date county fips place confirmed_cases note x y
100196 2020-03-27 Orange 59.0 Unknown 34 NaN NaN NaN
100188 2020-03-27 Orange 59.0 Other 27 NaN NaN NaN
99893 2020-03-28 Orange 59.0 Unknown 46 NaN NaN NaN
99884 2020-03-28 Orange 59.0 Other 39 NaN NaN NaN
99581 2020-03-29 Orange 59.0 Unknown 50 NaN NaN NaN
... ... ... ... ... ... ... ... ...
2270 2020-08-09 Orange 59.0 Rossmoor 29 NaN NaN NaN
1491 2020-08-10 Orange 59.0 Silverado 42 NaN NaN NaN
1486 2020-08-10 Orange 59.0 Rossmoor 29 NaN NaN NaN
659 2020-08-11 Orange 59.0 Silverado 42 NaN NaN NaN
654 2020-08-11 Orange 59.0 Rossmoor 31 NaN NaN NaN

191 rows × 8 columns

In [16]:
ca_covid.query("x > 0")
Out[16]:
date county fips place confirmed_cases note x y
3970 2020-08-07 San Mateo 81.0 Pacifica 126 NaN 122.480689 37.610177
9731 2020-07-31 San Mateo 81.0 Pacifica 109 NaN 122.480689 37.610177
15640 2020-07-24 San Mateo 81.0 Pacifica 100 NaN 122.480689 37.610177
22535 2020-07-16 San Mateo 81.0 Pacifica 93 NaN 122.480689 37.610177
27384 2020-07-10 San Mateo 81.0 Pacifica 81 NaN 122.480689 37.610177
31936 2020-07-03 San Mateo 81.0 Pacifica 75 NaN 122.480689 37.610177
38354 2020-06-25 San Mateo 81.0 Pacifica 59 NaN 122.480689 37.610177
43127 2020-06-19 San Mateo 81.0 Pacifica 51 NaN 122.480689 37.610177
48667 2020-06-12 San Mateo 81.0 Pacifica 47 NaN 122.480689 37.610177
54171 2020-06-05 San Mateo 81.0 Pacifica 45 NaN 122.480689 37.610177
55828 2020-06-03 San Mateo 81.0 Pacifica 43 NaN 122.480689 37.610177
58098 2020-05-31 San Mateo 81.0 Pacifica 43 NaN 122.480689 37.610177
59569 2020-05-29 San Mateo 81.0 Pacifica 43 NaN 122.480689 37.610177
61207 2020-05-27 San Mateo 81.0 Pacifica 39 NaN 122.480689 37.610177
64594 2020-05-22 San Mateo 81.0 Pacifica 39 NaN 122.480689 37.610177
65430 2020-05-21 San Mateo 81.0 Pacifica 39 NaN 122.480689 37.610177
70621 2020-05-14 San Mateo 81.0 Pacifica 38 NaN 122.480689 37.610177
71453 2020-05-13 San Mateo 81.0 Pacifica 38 NaN 122.480689 37.610177
73055 2020-05-11 San Mateo 81.0 Pacifica 38 NaN 122.480689 37.610177
75349 2020-05-08 San Mateo 81.0 Pacifica 38 NaN 122.480689 37.610177
In [17]:
ca_county.query("date.isnull()", engine='python')
Out[17]:
date county fips place confirmed_cases note x y
In [18]:
ca_county = ca_county.query("confirmed_cases != 'NaN' & x < 0 & x != 'NaN' & date.notnull()", engine='python')
ca_county.head()
Out[18]:
date county fips place confirmed_cases note x y
100198 2020-03-27 Orange 59.0 Yorba Linda 10 NaN -117.834681 33.890894
100170 2020-03-27 Orange 59.0 Aliso Viejo 2 NaN -117.726298 33.568460
100171 2020-03-27 Orange 59.0 Anaheim 28 NaN -117.889722 33.836111
100172 2020-03-27 Orange 59.0 Brea 1 NaN -117.900292 33.916475
100173 2020-03-27 Orange 59.0 Buena Park 7 NaN -117.998139 33.867404
In [19]:
ca_covid = ca_covid.query("confirmed_cases != 'NaN' & x < 0 & x != 'NaN' & date.notnull()", engine='python')
ca_covid.head()
Out[19]:
date county fips place confirmed_cases note x y
1 2020-08-11 Alameda 1.0 Alameda 209 NaN -122.274444 37.756111
2 2020-08-11 Alameda 1.0 Albany 38 NaN -122.297778 37.886944
3 2020-08-11 Alameda 1.0 Berkeley 432 NaN -122.272778 37.871667
5 2020-08-11 Alameda 1.0 Dublin 159 NaN -121.935833 37.702222
6 2020-08-11 Alameda 1.0 Emeryville 53 NaN -122.285278 37.831389
In [20]:
ca_city = ca_city.query("confirmed_cases != 'NaN' & x < 0 & x != 'NaN' & date.notnull()", engine='python')
ca_city.head()
Out[20]:
date county fips place confirmed_cases note x y
100181 2020-03-27 Orange 59.0 Irvine 33 NaN -117.8436 33.686502
99877 2020-03-28 Orange 59.0 Irvine 36 NaN -117.8436 33.686502
99565 2020-03-29 Orange 59.0 Irvine 38 NaN -117.8436 33.686502
99215 2020-03-30 Orange 59.0 Irvine 43 NaN -117.8436 33.686502
98834 2020-03-31 Orange 59.0 Irvine 50 NaN -117.8436 33.686502
In [41]:
import requests as r 
import json
from IPython.display import HTML

ca_json = r.get('https://raw.githubusercontent.com/OpenDataDE/State-zip-code-GeoJSON/master/ca_california_zip_codes_geo.min.json')
print(ca_json)

cali_map = folium.Map(location=[33.669445,-117.823059], zoom_start=12)
folium.Choropleth(geo_data=ca_json.json()['features'][512], 
                     fill_color='red',
                     fill_opacity=0.5,
                     line_opacity=0.8,
                     key_on='feature.properties.ZCTA5CE10').add_to(cali_map)

cali_map.save('plot_data_2.html')
HTML('<iframe src=plot_data_2.html width=800 height=500></iframe>')
<Response [200]>
Out[41]:
In [21]:
ca_city.head()
Out[21]:
date county fips place confirmed_cases note x y
100181 2020-03-27 Orange 59.0 Irvine 33 NaN -117.8436 33.686502
99877 2020-03-28 Orange 59.0 Irvine 36 NaN -117.8436 33.686502
99565 2020-03-29 Orange 59.0 Irvine 38 NaN -117.8436 33.686502
99215 2020-03-30 Orange 59.0 Irvine 43 NaN -117.8436 33.686502
98834 2020-03-31 Orange 59.0 Irvine 50 NaN -117.8436 33.686502
In [22]:
ca_city.shape
Out[22]:
(137, 8)
In [23]:
ca_city = ca_city.sort_values(by=["date"], ascending=True)
ca_city.tail()
Out[23]:
date county fips place confirmed_cases note x y
3775 2020-08-07 Orange 59.0 Irvine 1215 NaN -117.8436 33.686502
2946 2020-08-08 Orange 59.0 Irvine 1225 NaN -117.8436 33.686502
2253 2020-08-09 Orange 59.0 Irvine 1240 NaN -117.8436 33.686502
1469 2020-08-10 Orange 59.0 Irvine 1254 NaN -117.8436 33.686502
637 2020-08-11 Orange 59.0 Irvine 1274 NaN -117.8436 33.686502
In [24]:
ca_city.iloc[0]['date']
Out[24]:
'2020-03-27'
In [25]:
lastdate = ca_city.iloc[-1]['date']
lastdate
Out[25]:
'2020-08-11'
In [30]:
irvine_single_day = ca_city.query('date==@lastdate')
irvine_single_day = irvine_single_day.sort_values(by=["confirmed_cases"], ascending=True)
irvine_single_day
Out[30]:
date county fips place confirmed_cases note x y
637 2020-08-11 Orange 59.0 Irvine 1274 NaN -117.8436 33.686502
In [27]:
orange_single_day = ca_county.query('date==@lastdate')
orange_single_day = orange_single_day.sort_values(by=["confirmed_cases"], ascending=True)
orange_single_day.head()
Out[27]:
date county fips place confirmed_cases note x y
630 2020-08-11 Orange 59.0 Coto de Caza 30 NaN -117.587778 33.595833
663 2020-08-11 Orange 59.0 Villa Park 42 NaN -117.822072 33.814006
644 2020-08-11 Orange 59.0 Laguna Woods 45 NaN -117.725116 33.610170
652 2020-08-11 Orange 59.0 Rancho Mission Viejo 47 NaN -117.610000 33.600000
647 2020-08-11 Orange 59.0 Midway City 92 NaN -117.987000 33.744700
In [31]:
ca_single_day = ca_covid.query('date==@lastdate')
ca_single_day
Out[31]:
date county fips place confirmed_cases note x y
1 2020-08-11 Alameda 1.0 Alameda 209 NaN -122.274444 37.756111
2 2020-08-11 Alameda 1.0 Albany 38 NaN -122.297778 37.886944
3 2020-08-11 Alameda 1.0 Berkeley 432 NaN -122.272778 37.871667
5 2020-08-11 Alameda 1.0 Dublin 159 NaN -121.935833 37.702222
6 2020-08-11 Alameda 1.0 Emeryville 53 NaN -122.285278 37.831389
... ... ... ... ... ... ... ... ...
998 2020-08-11 Yolo 113.0 Winters 78 NaN -121.970833 38.525000
999 2020-08-11 Yolo 113.0 Woodland 830 NaN -121.773333 38.678611
1000 2020-08-11 Yuba 115.0 Marysville 312 NaN -121.583333 39.150000
1001 2020-08-11 Yuba 115.0 Olivehurst/Linda 245 NaN -121.550833 39.127778
1003 2020-08-11 Yuba 115.0 Plumas Lake & Wheatland 75 NaN -121.558056 39.020833

924 rows × 8 columns

In [32]:
ca_single_day = ca_single_day.query("confirmed_cases != 'NaN' & x < 0 & x != 'NaN' & date.notnull()", engine='python')
ca_single_day.head()
Out[32]:
date county fips place confirmed_cases note x y
1 2020-08-11 Alameda 1.0 Alameda 209 NaN -122.274444 37.756111
2 2020-08-11 Alameda 1.0 Albany 38 NaN -122.297778 37.886944
3 2020-08-11 Alameda 1.0 Berkeley 432 NaN -122.272778 37.871667
5 2020-08-11 Alameda 1.0 Dublin 159 NaN -121.935833 37.702222
6 2020-08-11 Alameda 1.0 Emeryville 53 NaN -122.285278 37.831389
In [33]:
ca_city = ca_city.query("place=='Irvine'")
ca_city
Out[33]:
date county fips place confirmed_cases note x y
100181 2020-03-27 Orange 59.0 Irvine 33 NaN -117.8436 33.686502
99877 2020-03-28 Orange 59.0 Irvine 36 NaN -117.8436 33.686502
99565 2020-03-29 Orange 59.0 Irvine 38 NaN -117.8436 33.686502
99215 2020-03-30 Orange 59.0 Irvine 43 NaN -117.8436 33.686502
98834 2020-03-31 Orange 59.0 Irvine 50 NaN -117.8436 33.686502
... ... ... ... ... ... ... ... ...
3775 2020-08-07 Orange 59.0 Irvine 1215 NaN -117.8436 33.686502
2946 2020-08-08 Orange 59.0 Irvine 1225 NaN -117.8436 33.686502
2253 2020-08-09 Orange 59.0 Irvine 1240 NaN -117.8436 33.686502
1469 2020-08-10 Orange 59.0 Irvine 1254 NaN -117.8436 33.686502
637 2020-08-11 Orange 59.0 Irvine 1274 NaN -117.8436 33.686502

137 rows × 8 columns

In [34]:
ca_covid.confirmed_cases.describe()
Out[34]:
count    97788.000000
mean       214.692519
std        540.082760
min          1.000000
25%         11.000000
50%         48.000000
75%        180.000000
max      14415.000000
Name: confirmed_cases, dtype: float64
In [35]:
ca_covid.groupby("county").confirmed_cases.describe()
Out[35]:
count mean std min 25% 50% 75% max
county
Alameda 1841.0 266.177078 610.556529 1.0 28.00 68.0 214.00 5245.0
Amador 380.0 7.473684 12.151065 1.0 1.00 4.0 8.00 95.0
Butte 233.0 90.952790 129.824658 3.0 13.00 28.0 115.00 568.0
Calaveras 120.0 11.825000 12.329961 1.0 3.00 5.0 16.00 54.0
Contra Costa 3328.0 111.404147 214.367733 1.0 18.00 44.0 96.00 2033.0
El Dorado 636.0 28.218553 54.067013 1.0 4.00 9.0 24.00 374.0
Fresno 1514.0 173.996697 609.034138 1.0 15.00 41.0 125.00 9051.0
Humboldt 87.0 50.701149 56.194661 3.0 8.00 27.0 84.00 218.0
Imperial 1141.0 291.491674 568.558416 1.0 9.00 56.0 267.00 3035.0
Kern 3549.0 181.492251 363.809904 1.0 4.00 40.0 180.00 3617.0
Kings 376.0 217.648936 230.440909 1.0 41.75 138.0 310.00 1321.0
Long Beach 968.0 329.537190 342.526643 14.0 108.00 213.0 426.00 2110.0
Los Angeles 42442.0 234.668065 488.252262 1.0 12.00 57.0 224.00 9210.0
Madera 64.0 414.015625 344.842430 58.0 115.00 267.5 657.25 1247.0
Marin 2044.0 60.608611 242.347411 1.0 1.00 10.0 30.00 2232.0
Mendocino 284.0 33.859155 61.872957 1.0 3.00 9.0 36.00 351.0
Merced 659.0 134.195751 250.287967 6.0 13.00 42.0 131.00 2204.0
Mono 177.0 27.389831 35.960687 1.0 2.00 12.0 35.00 141.0
Monterey 424.0 449.377358 647.891433 8.0 55.00 208.5 454.50 3147.0
Napa 1031.0 28.383123 76.418557 1.0 2.00 4.0 18.50 672.0
Nevada 373.0 21.420912 32.167527 1.0 1.00 12.0 29.00 146.0
Orange 5219.0 297.549147 770.900228 1.0 25.00 70.0 231.00 8062.0
Placer 318.0 248.386792 414.468507 11.0 25.00 88.5 206.00 1987.0
Plumas 58.0 5.965517 5.109213 1.0 1.00 4.5 11.75 16.0
Riverside 6606.0 189.335301 472.405938 1.0 10.00 38.0 146.00 6103.0
Sacramento 839.0 422.506555 1071.500489 1.0 26.00 99.0 324.00 7620.0
San Bernardino 4652.0 282.588779 638.587830 1.0 8.00 41.0 230.00 5833.0
San Diego 5035.0 260.156504 1038.772599 1.0 6.00 27.0 124.00 14415.0
San Francisco 2659.0 124.650621 178.036347 1.0 27.00 60.0 136.00 1160.0
San Joaquin 420.0 283.216667 735.896835 1.0 32.00 70.0 233.00 5847.0
San Luis Obispo 1136.0 49.548415 76.841787 5.0 9.00 19.0 47.25 616.0
San Mateo 620.0 87.132258 179.894086 1.0 1.00 13.0 74.00 1243.0
Santa Barbara 1193.0 192.270746 423.450142 1.0 15.00 45.0 124.00 3176.0
Santa Clara 1719.0 278.175684 861.836637 1.0 24.00 63.0 138.00 8571.0
Santa Cruz 357.0 85.182073 122.494888 5.0 15.00 42.0 91.00 649.0
Solano 598.0 184.742475 299.201473 1.0 19.25 52.5 211.00 1467.0
Sonoma 491.0 175.639511 285.924134 13.0 22.50 62.0 201.00 1689.0
Stanislaus 1043.0 177.315436 403.901847 1.0 12.00 45.0 154.00 3297.0
Sutter 38.0 336.815789 264.623226 68.0 88.00 262.0 588.25 812.0
Tulare 858.0 182.351981 305.845073 1.0 13.00 62.5 215.50 1870.0
Ventura 1775.0 125.690704 205.686130 1.0 7.00 40.0 160.00 1616.0
Yolo 417.0 143.342926 177.424035 4.0 36.00 76.0 155.00 830.0
Yuba 66.0 144.075758 80.047738 34.0 63.50 146.5 208.50 312.0
In [36]:
ca_covid.groupby("county").confirmed_cases.describe().sort_values(by=["max"], ascending=False)
Out[36]:
count mean std min 25% 50% 75% max
county
San Diego 5035.0 260.156504 1038.772599 1.0 6.00 27.0 124.00 14415.0
Los Angeles 42442.0 234.668065 488.252262 1.0 12.00 57.0 224.00 9210.0
Fresno 1514.0 173.996697 609.034138 1.0 15.00 41.0 125.00 9051.0
Santa Clara 1719.0 278.175684 861.836637 1.0 24.00 63.0 138.00 8571.0
Orange 5219.0 297.549147 770.900228 1.0 25.00 70.0 231.00 8062.0
Sacramento 839.0 422.506555 1071.500489 1.0 26.00 99.0 324.00 7620.0
Riverside 6606.0 189.335301 472.405938 1.0 10.00 38.0 146.00 6103.0
San Joaquin 420.0 283.216667 735.896835 1.0 32.00 70.0 233.00 5847.0
San Bernardino 4652.0 282.588779 638.587830 1.0 8.00 41.0 230.00 5833.0
Alameda 1841.0 266.177078 610.556529 1.0 28.00 68.0 214.00 5245.0
Kern 3549.0 181.492251 363.809904 1.0 4.00 40.0 180.00 3617.0
Stanislaus 1043.0 177.315436 403.901847 1.0 12.00 45.0 154.00 3297.0
Santa Barbara 1193.0 192.270746 423.450142 1.0 15.00 45.0 124.00 3176.0
Monterey 424.0 449.377358 647.891433 8.0 55.00 208.5 454.50 3147.0
Imperial 1141.0 291.491674 568.558416 1.0 9.00 56.0 267.00 3035.0
Marin 2044.0 60.608611 242.347411 1.0 1.00 10.0 30.00 2232.0
Merced 659.0 134.195751 250.287967 6.0 13.00 42.0 131.00 2204.0
Long Beach 968.0 329.537190 342.526643 14.0 108.00 213.0 426.00 2110.0
Contra Costa 3328.0 111.404147 214.367733 1.0 18.00 44.0 96.00 2033.0
Placer 318.0 248.386792 414.468507 11.0 25.00 88.5 206.00 1987.0
Tulare 858.0 182.351981 305.845073 1.0 13.00 62.5 215.50 1870.0
Sonoma 491.0 175.639511 285.924134 13.0 22.50 62.0 201.00 1689.0
Ventura 1775.0 125.690704 205.686130 1.0 7.00 40.0 160.00 1616.0
Solano 598.0 184.742475 299.201473 1.0 19.25 52.5 211.00 1467.0
Kings 376.0 217.648936 230.440909 1.0 41.75 138.0 310.00 1321.0
Madera 64.0 414.015625 344.842430 58.0 115.00 267.5 657.25 1247.0
San Mateo 620.0 87.132258 179.894086 1.0 1.00 13.0 74.00 1243.0
San Francisco 2659.0 124.650621 178.036347 1.0 27.00 60.0 136.00 1160.0
Yolo 417.0 143.342926 177.424035 4.0 36.00 76.0 155.00 830.0
Sutter 38.0 336.815789 264.623226 68.0 88.00 262.0 588.25 812.0
Napa 1031.0 28.383123 76.418557 1.0 2.00 4.0 18.50 672.0
Santa Cruz 357.0 85.182073 122.494888 5.0 15.00 42.0 91.00 649.0
San Luis Obispo 1136.0 49.548415 76.841787 5.0 9.00 19.0 47.25 616.0
Butte 233.0 90.952790 129.824658 3.0 13.00 28.0 115.00 568.0
El Dorado 636.0 28.218553 54.067013 1.0 4.00 9.0 24.00 374.0
Mendocino 284.0 33.859155 61.872957 1.0 3.00 9.0 36.00 351.0
Yuba 66.0 144.075758 80.047738 34.0 63.50 146.5 208.50 312.0
Humboldt 87.0 50.701149 56.194661 3.0 8.00 27.0 84.00 218.0
Nevada 373.0 21.420912 32.167527 1.0 1.00 12.0 29.00 146.0
Mono 177.0 27.389831 35.960687 1.0 2.00 12.0 35.00 141.0
Amador 380.0 7.473684 12.151065 1.0 1.00 4.0 8.00 95.0
Calaveras 120.0 11.825000 12.329961 1.0 3.00 5.0 16.00 54.0
Plumas 58.0 5.965517 5.109213 1.0 1.00 4.5 11.75 16.0
In [35]:
ca_county.groupby("place").confirmed_cases.describe().sort_values(by=["max"], ascending=False).head(50)
Out[35]:
count mean std min 25% 50% 75% max
place
Santa Ana 134.0 2233.000000 2345.613375 13.0 339.00 1286.5 3456.75 7496.0
Anaheim 134.0 2035.582090 2116.337844 28.0 323.50 1140.0 3215.25 6682.0
Garden Grove 134.0 623.149254 650.897894 4.0 101.50 361.5 933.75 2094.0
Orange 134.0 535.500000 565.014508 11.0 86.00 288.5 807.00 1793.0
Fullerton 134.0 505.462687 575.581637 7.0 75.25 219.5 765.75 1771.0
Huntington Beach 134.0 589.477612 551.300605 26.0 216.25 350.5 857.75 1753.0
Costa Mesa 134.0 331.283582 409.353619 8.0 38.50 128.5 491.00 1274.0
Irvine 134.0 387.619403 379.607411 33.0 127.75 204.0 539.00 1225.0
Buena Park 134.0 336.298507 334.214443 7.0 77.25 177.5 508.25 1092.0
Other/unknown 69.0 453.043478 307.945251 52.0 87.00 502.0 712.00 998.0
La Habra 134.0 256.940299 291.738805 1.0 47.25 119.5 357.75 958.0
Tustin 134.0 249.813433 288.630866 4.0 39.00 109.0 367.00 923.0
Newport Beach 134.0 286.656716 270.429986 32.0 97.00 145.0 404.25 917.0
Other* 2.0 832.000000 41.012193 803.0 817.50 832.0 846.50 861.0
Westminster 134.0 222.925373 224.220475 5.0 40.75 141.0 338.25 704.0
Placentia 134.0 216.194030 217.715151 5.0 53.00 129.5 307.00 698.0
Lake Forest 134.0 165.731343 200.348687 5.0 28.25 67.0 217.50 634.0
Mission Viejo 134.0 174.656716 193.070641 6.0 42.50 76.0 236.50 598.0
Unknown 8.0 237.250000 265.434818 34.0 45.50 50.5 550.25 564.0
Yorba Linda 133.0 161.345865 159.246761 10.0 49.00 83.0 226.00 513.0
Stanton 133.0 137.428571 145.968048 1.0 27.00 81.0 209.00 480.0
Cypress 134.0 126.283582 122.733565 6.0 42.00 77.0 170.25 427.0
Other 9.0 148.333333 184.482384 15.0 16.00 39.0 394.00 394.0
Fountain Valley 134.0 115.828358 118.567105 5.0 31.25 62.5 154.25 376.0
Brea 134.0 99.634328 115.454808 1.0 20.00 43.5 137.25 358.0
San Clemente 134.0 112.365672 98.893391 10.0 45.50 68.0 137.00 334.0
Laguna Niguel 134.0 89.500000 91.345724 11.0 32.00 39.5 113.00 297.0
San Juan Capistrano 134.0 87.917910 82.925190 9.0 24.75 58.0 106.50 291.0
Aliso Viejo 133.0 80.744361 93.361358 2.0 18.00 31.0 117.00 284.0
Rancho Santa Margarita 134.0 69.231343 83.832084 3.0 14.00 23.0 95.50 253.0
Laguna Hills 131.0 71.320611 72.561196 1.0 22.00 34.0 107.00 230.0
Seal Beach 134.0 69.343284 73.184914 1.0 11.00 23.0 126.50 224.0
Dana Point 134.0 58.335821 58.931073 6.0 22.00 29.0 67.50 191.0
Los Alamitos 119.0 81.747899 47.070488 1.0 41.00 86.0 121.00 156.0
Trabuco Canyon 126.0 45.119048 51.672911 5.0 11.00 17.0 70.25 155.0
Laguna Beach 131.0 61.923664 33.836625 22.0 37.00 46.0 76.50 142.0
La Palma 131.0 41.076336 37.851454 6.0 14.00 24.0 60.00 126.0
Ladera Ranch 128.0 37.554688 39.021054 6.0 12.00 16.0 50.00 121.0
Midway City 104.0 32.413462 22.871787 5.0 12.00 27.0 52.25 81.0
Other/Unknown 10.0 63.500000 5.254628 55.0 60.00 63.0 67.50 72.0
Rancho Mission Viejo 115.0 18.469565 15.672818 5.0 6.00 10.0 36.00 46.0
Laguna Woods 125.0 16.848000 12.125058 1.0 8.00 10.0 24.00 43.0
Villa Park 124.0 16.008065 13.596685 1.0 6.00 8.0 31.00 42.0
Silverado 34.0 29.352941 12.254578 8.0 16.00 38.0 39.00 40.0
Rossmoor 53.0 22.773585 6.018055 12.0 18.00 25.0 29.00 29.0
Coto de Caza 118.0 10.135593 7.013924 5.0 5.00 7.0 11.00 28.0
Corona Del Mar 1.0 12.000000 NaN 12.0 12.00 12.0 12.00 12.0
In [37]:
!pip install plotly
Requirement already satisfied: plotly in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (4.9.0)
Requirement already satisfied: six in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from plotly) (1.15.0)
Requirement already satisfied: retrying>=1.3.3 in c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages (from plotly) (1.3.3)
In [38]:
import plotly.express as px
Irvine = ca_covid.query("place == ['Irvine']")
px.bar(Irvine,
      x='date',
      y='confirmed_cases')
In [39]:
OC_Cities = ca_county.query("place == ['Santa Ana','Anaheim','Irvine']")
px.bar(OC_Cities,
      x='date',
      y='confirmed_cases',
      color = 'place')
In [40]:
px.scatter(orange_single_day,
           x='x',
           y='y',
           hover_name='place',
           color='confirmed_cases')
In [41]:
px.scatter(ca_single_day,
           x='x',
           y='y',
           hover_name='place',
           color='confirmed_cases')
In [42]:
px.scatter(ca_single_day,
           x='x',
           y='y',
           color='confirmed_cases', 
           size='confirmed_cases',
           size_max=40, 
           hover_name='place',
           title = 'Confirmed Cases for ' + lastdate)
In [43]:
px.scatter(ca_single_day,
           x='x',
           y='y',
           color='confirmed_cases', 
           size='confirmed_cases',
           size_max=40, 
           hover_name='place',
           color_continuous_scale = 'RdYlGn_r') # added _r to reverse color scheme
In [44]:
ca_single_day_mean = ca_single_day.confirmed_cases.mean()
ca_single_day_mean
Out[44]:
585.3290043290043
In [45]:
px.scatter(ca_single_day,
           x='x',
           y='y',
           color='confirmed_cases', 
           size='confirmed_cases',
           size_max=40, 
           hover_name='place',
           color_continuous_scale = 'RdYlGn_r', # added _r to reverse color scheme
           range_color = (0,ca_single_day_mean * 2) # double the mean
          )
In [46]:
orange_mean = ca_county.confirmed_cases.mean()
orange_mean
Out[46]:
297.5491473462349
In [47]:
px.scatter(ca_county,
           x='x',
           y='y',
           color='confirmed_cases', 
           size='confirmed_cases',
           size_max=40, 
           hover_name='place',
           animation_frame='date', # this creates a frame by frame animation by day
           color_continuous_scale = 'RdYlGn_r',
           range_color = (0,orange_mean*2))
In [48]:
fig = px.scatter_geo(ca_single_day,
           lon='x',
           lat='y',
           color='confirmed_cases', 
           size='confirmed_cases',
           size_max=40, 
           hover_name='place',
           scope='usa',
           color_continuous_scale = 'RdYlGn_r',
           range_color = (0,ca_single_day_mean * 2) # double the mean 
            )

fig.update_geos(fitbounds="locations") 
In [49]:
fig = px.scatter_geo(ca_county,
           lon='x',
           lat='y',
           color='confirmed_cases', 
           size='confirmed_cases',
           size_max=40, 
           hover_name='place',
           scope='usa',                     
           animation_frame='date',
           color_continuous_scale = 'RdYlGn_r',
           range_color = (0,orange_mean*2))

fig.update_geos(fitbounds="locations") 
In [71]:
# Set the map up
map_orange = folium.Map(location=[33.6846, -117.8265],
                        tiles = "Stamen Toner",
                        zoom_start = 9)
# Simple marker
folium.Marker([33.6846, -117.8265],
              popup='Irvine',
              icon=folium.Icon(color='green')
             ).add_to(map_orange)

# Circle marker
folium.CircleMarker([33.6846, -117.8265],
                    radius=30,
                    popup='Irvine',
                    color='red',
                    ).add_to(map_orange)

# Interactive marker
map_orange.add_child(folium.ClickForMarker(popup="Irvine"))

map_orange
Out[71]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [50]:
locations = ca_county[['x', 'y']]
locationlist = locations.values.tolist()
len(locationlist)
locationlist[7]
Out[50]:
[-117.698912, 33.467458]
In [51]:
# Create a Map instance
map = folium.Map(location=[34,-118], 
               zoom_start=8, 
               control_scale=True
                )

map
Out[51]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [52]:
# Create a Map instance with different basemap
map = folium.Map(location=[34,-118], 
               zoom_start=8, 
               control_scale=True,
               tiles='CartoDB dark_matter',
               attr= '&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors &copy; <a href="https://carto.com/attributions">CARTO</a>')
map
Out[52]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [53]:
# add a circle
my_circle = folium.Circle(
    radius=10000, # this is in meters
    location=[34,-118],
    color='crimson',
    fill=True,
)
my_circle.add_to(map)
map
Out[53]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [54]:
orange_single_day.plot(x ='x', y='y', kind = 'scatter')
Out[54]:
<AxesSubplot:xlabel='x', ylabel='y'>
In [55]:
orange_single_day = orange_single_day.query("confirmed_cases != 'NaN' & x < 0 & x != 'NaN' & date.notnull()", engine='python')
orange_single_day.head()
Out[55]:
date county fips place confirmed_cases note x y
630 2020-08-11 Orange 59.0 Coto de Caza 30 NaN -117.587778 33.595833
663 2020-08-11 Orange 59.0 Villa Park 42 NaN -117.822072 33.814006
644 2020-08-11 Orange 59.0 Laguna Woods 45 NaN -117.725116 33.610170
652 2020-08-11 Orange 59.0 Rancho Mission Viejo 47 NaN -117.610000 33.600000
647 2020-08-11 Orange 59.0 Midway City 92 NaN -117.987000 33.744700
In [99]:
# loop through the rows in Los Angeles, and create a circle based on confirmed cases
for index, row in orange_single_day.iterrows():
    # set up the variables
    lat = row['y']
    lon = row['x']
    label = str(row['confirmed_cases']) + ' confirmed cases in ' + row['place']
    size = row['confirmed_cases']
    
    # create a circle for every row
    circle = folium.Circle(
        radius=size,
        location=[lat,lon],
        tooltip = label,
        color='crimson',
        fill = True
    )
    circle.add_to(map)
# show the map
map
Out[99]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [56]:
import altair as alt
In [57]:
# reset the map (only way to get rid of circles)
map = folium.Map(location=[34,-118], 
                zoom_start=8, 
                control_scale=True,
                tiles='CartoDB dark_matter',
                attr= '&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors &copy; <a href="https://carto.com/attributions">CARTO</a>')
In [58]:
# create a function to create circles, and also add a chart in the popup window (a lot here, will break it up later)
def createCircle(lat,lon,size,place,label):
    # create a bar chart for each circle
    bar = alt.Chart(ca_county.query('place == @place')).mark_bar().encode(
        x=alt.X('date', axis=alt.Axis(labels=False)), # turn the labels off because there are too many
        y='confirmed_cases',
        color='confirmed_cases',
        tooltip = ['date','place','confirmed_cases']
    ).properties(width=400,height=200)

    # add the bar chart as a folium feature
    vega = folium.features.VegaLite(
        bar,
        width=600,
        height=200,
    )

    # create the circle
    circle = folium.Circle(
        radius=size,
        location=[lat,lon],
        tooltip = label,
        color='crimson',
        fill = True
    )

    # create a popup
    popup = folium.Popup()

    # add the chart to the popup
    vega.add_to(popup)
    
    # add the popup to the circle
    popup.add_to(circle)
    
    # add the circle to the map
    circle.add_to(map)
    
In [59]:
# loop through the rows in Los Angeles, and create a circle based on confirmed cases
for index, row in orange_single_day.iterrows():
    label = str(row['confirmed_cases']) + ' confirmed cases in ' + row['place']
    createCircle(row['y'],row['x'],row['confirmed_cases'],row['place'],label)
In [60]:
# show the map
map
Out[60]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [61]:
# save it!
map.save('index.html')
In [62]:
orange_single_day.head()
Out[62]:
date county fips place confirmed_cases note x y
630 2020-08-11 Orange 59.0 Coto de Caza 30 NaN -117.587778 33.595833
663 2020-08-11 Orange 59.0 Villa Park 42 NaN -117.822072 33.814006
644 2020-08-11 Orange 59.0 Laguna Woods 45 NaN -117.725116 33.610170
652 2020-08-11 Orange 59.0 Rancho Mission Viejo 47 NaN -117.610000 33.600000
647 2020-08-11 Orange 59.0 Midway City 92 NaN -117.987000 33.744700
In [63]:
orange_single_day.tail()
Out[63]:
date county fips place confirmed_cases note x y
634 2020-08-11 Orange 59.0 Fullerton 1871 NaN -117.925205 33.871972
650 2020-08-11 Orange 59.0 Orange 1896 NaN -117.887465 33.807614
635 2020-08-11 Orange 59.0 Garden Grove 2261 NaN -117.940639 33.773220
626 2020-08-11 Orange 59.0 Anaheim 7093 NaN -117.889722 33.836111
657 2020-08-11 Orange 59.0 Santa Ana 8062 NaN -117.881389 33.740833
In [64]:
style.use('ggplot')
ax = plt.gca()
orange_single_day.plot(kind='line',x='place',y='confirmed_cases', color='green', ax=ax, figsize=(20,10))

plt.title('Confirmed Cases in Orange County')
plt.xlabel('Place')
plt.ylabel('Count')
plt.show()
c:\users\sunil\appdata\local\programs\python\python38-32\lib\site-packages\pandas\plotting\_matplotlib\core.py:1235: UserWarning:

FixedFormatter should only be used together with FixedLocator

In [66]:
ca_county = ca_county.query("confirmed_cases != 'NaN' & x < 0 & x != 'NaN' & date.notnull()", engine='python')
ca_county.head()
Out[66]:
date county fips place confirmed_cases note x y
100198 2020-03-27 Orange 59.0 Yorba Linda 10 NaN -117.834681 33.890894
100170 2020-03-27 Orange 59.0 Aliso Viejo 2 NaN -117.726298 33.568460
100171 2020-03-27 Orange 59.0 Anaheim 28 NaN -117.889722 33.836111
100172 2020-03-27 Orange 59.0 Brea 1 NaN -117.900292 33.916475
100173 2020-03-27 Orange 59.0 Buena Park 7 NaN -117.998139 33.867404
In [154]:
 
In [ ]: